package com.david.http;

/*
 * Copyright (c) 2002-2015 Gargoyle Software Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.Charset;
import java.nio.charset.IllegalCharsetNameException;
import java.util.HashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;

import org.apache.commons.io.Charsets;
import org.apache.commons.lang3.ArrayUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.http.Header;

/**
 * Sniffs encoding settings from HTML, XML or other content. The HTML encoding
 * sniffing algorithm is based on the <a href=
 * "http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#determining-the-character-encoding"
 * >HTML5 encoding sniffing algorithm</a>.
 *
 * @version $Revision: 10768 $
 * @author Daniel Gredler
 * @author Ahmed Ashour
 * @author Ronald Brill
 */
public final class EncodingSniffer {

	/** Logging support. */
	private static final Log LOG = LogFactory.getLog(EncodingSniffer.class);

	/** UTF-16 (little endian) charset name. */
	static final String UTF16_LE = "UTF-16LE";

	/** UTF-16 (big endian) charset name. */
	static final String UTF16_BE = "UTF-16BE";

	/** UTF-8 charset name. */
	static final String UTF8 = "UTF-8";

	/** Sequence(s) of bytes indicating the beginning of a comment. */
	private static final byte[][] COMMENT_START = new byte[][] {
			new byte[] { '<' }, new byte[] { '!' }, new byte[] { '-' },
			new byte[] { '-' } };

	/**
	 * Sequence(s) of bytes indicating the beginning of a <tt>meta</tt> HTML
	 * tag.
	 */
	private static final byte[][] META_START = new byte[][] {
			new byte[] { '<' }, new byte[] { 'm', 'M' },
			new byte[] { 'e', 'E' }, new byte[] { 't', 'T' },
			new byte[] { 'a', 'A' },
			new byte[] { 0x09, 0x0A, 0x0C, 0x0D, 0x20, 0x2F } };

	/**
	 * Sequence(s) of bytes indicating the beginning of miscellaneous HTML
	 * content.
	 */
	private static final byte[][] OTHER_START = new byte[][] {
			new byte[] { '<' }, new byte[] { '!', '/', '?' } };

	/**
	 * Sequence(s) of bytes indicating the beginning of a charset specification.
	 */
	private static final byte[][] CHARSET_START = new byte[][] {
			new byte[] { 'c', 'C' }, new byte[] { 'h', 'H' },
			new byte[] { 'a', 'A' }, new byte[] { 'r', 'R' },
			new byte[] { 's', 'S' }, new byte[] { 'e', 'E' },
			new byte[] { 't', 'T' } };

	/** <a href="http://encoding.spec.whatwg.org/#encodings">Reference</a> */
	private static final Map<String, String> ENCODING_FROM_LABEL;
	static {
		ENCODING_FROM_LABEL = new HashMap<String, String>();

		// The Encoding
		// ------------
		ENCODING_FROM_LABEL.put("unicode-1-1-utf-8", "utf-8");
		ENCODING_FROM_LABEL.put("utf-8", "utf-8");
		ENCODING_FROM_LABEL.put("utf8", "utf-8");

		// Legacy single-byte encodings
		// ----------------------------

		// ibm866
		ENCODING_FROM_LABEL.put("866", "ibm866");
		ENCODING_FROM_LABEL.put("cp866", "ibm866");
		ENCODING_FROM_LABEL.put("csibm866", "ibm866");
		ENCODING_FROM_LABEL.put("ibm866", "ibm866");

		// iso-8859-2
		ENCODING_FROM_LABEL.put("csisolatin2", "iso-8859-2");
		ENCODING_FROM_LABEL.put("iso-8859-2", "iso-8859-2");
		ENCODING_FROM_LABEL.put("iso-ir-101", "iso-8859-2");
		ENCODING_FROM_LABEL.put("iso8859-2", "iso-8859-2");
		ENCODING_FROM_LABEL.put("iso88592", "iso-8859-2");
		ENCODING_FROM_LABEL.put("iso_8859-2", "iso-8859-2");
		ENCODING_FROM_LABEL.put("iso_8859-2:1987", "iso-8859-2");
		ENCODING_FROM_LABEL.put("l2", "iso-8859-2");
		ENCODING_FROM_LABEL.put("latin2", "iso-8859-2");

		// iso-8859-3
		ENCODING_FROM_LABEL.put("csisolatin2", "iso-8859-3");
		ENCODING_FROM_LABEL.put("csisolatin3", "iso-8859-3");
		ENCODING_FROM_LABEL.put("iso-8859-3", "iso-8859-3");
		ENCODING_FROM_LABEL.put("iso-ir-109", "iso-8859-3");
		ENCODING_FROM_LABEL.put("iso8859-3", "iso-8859-3");
		ENCODING_FROM_LABEL.put("iso88593", "iso-8859-3");
		ENCODING_FROM_LABEL.put("iso_8859-3", "iso-8859-3");
		ENCODING_FROM_LABEL.put("iso_8859-3:1988", "iso-8859-3");
		ENCODING_FROM_LABEL.put("l3", "iso-8859-3");
		ENCODING_FROM_LABEL.put("latin3", "iso-8859-3");

		// iso-8859-4
		ENCODING_FROM_LABEL.put("csisolatin4", "iso-8859-4");
		ENCODING_FROM_LABEL.put("iso-8859-4", "iso-8859-4");
		ENCODING_FROM_LABEL.put("iso-ir-110", "iso-8859-4");
		ENCODING_FROM_LABEL.put("iso8859-4", "iso-8859-4");
		ENCODING_FROM_LABEL.put("iso88594", "iso-8859-4");
		ENCODING_FROM_LABEL.put("iso_8859-4", "iso-8859-4");
		ENCODING_FROM_LABEL.put("iso_8859-4:1988", "iso-8859-4");
		ENCODING_FROM_LABEL.put("l4", "iso-8859-4");
		ENCODING_FROM_LABEL.put("latin4", "iso-8859-4");

		// iso-8859-5
		ENCODING_FROM_LABEL.put("csisolatincyrillic", "iso-8859-5");
		ENCODING_FROM_LABEL.put("csisolatincyrillic", "iso-8859-5");
		ENCODING_FROM_LABEL.put("cyrillic", "iso-8859-5");
		ENCODING_FROM_LABEL.put("iso-8859-5", "iso-8859-5");
		ENCODING_FROM_LABEL.put("iso-ir-144", "iso-8859-5");
		ENCODING_FROM_LABEL.put("iso8859-5", "iso-8859-5");
		ENCODING_FROM_LABEL.put("iso88595", "iso-8859-5");
		ENCODING_FROM_LABEL.put("iso_8859-5", "iso-8859-5");
		ENCODING_FROM_LABEL.put("iso_8859-5:1988", "iso-8859-5");

		// iso-8859-6
		ENCODING_FROM_LABEL.put("arabic", "iso-8859-6");
		ENCODING_FROM_LABEL.put("asmo-708", "iso-8859-6");
		ENCODING_FROM_LABEL.put("csiso88596e", "iso-8859-6");
		ENCODING_FROM_LABEL.put("csiso88596i", "iso-8859-6");
		ENCODING_FROM_LABEL.put("csisolatinarabic", "iso-8859-6");
		ENCODING_FROM_LABEL.put("ecma-114", "iso-8859-6");
		ENCODING_FROM_LABEL.put("iso-8859-6", "iso-8859-6");
		ENCODING_FROM_LABEL.put("iso-8859-6-e", "iso-8859-6");
		ENCODING_FROM_LABEL.put("iso-8859-6-i", "iso-8859-6");
		ENCODING_FROM_LABEL.put("iso-ir-127", "iso-8859-6");
		ENCODING_FROM_LABEL.put("iso8859-6", "iso-8859-6");
		ENCODING_FROM_LABEL.put("iso88596", "iso-8859-6");
		ENCODING_FROM_LABEL.put("iso_8859-6", "iso-8859-6");
		ENCODING_FROM_LABEL.put("iso_8859-6:1987", "iso-8859-6");

		// iso-8859-7
		ENCODING_FROM_LABEL.put("csisolatingreek", "iso-8859-7");
		ENCODING_FROM_LABEL.put("ecma-118", "iso-8859-7");
		ENCODING_FROM_LABEL.put("elot_928", "iso-8859-7");
		ENCODING_FROM_LABEL.put("greek", "iso-8859-7");
		ENCODING_FROM_LABEL.put("greek8", "iso-8859-7");
		ENCODING_FROM_LABEL.put("iso-8859-7", "iso-8859-7");
		ENCODING_FROM_LABEL.put("iso-ir-126", "iso-8859-7");
		ENCODING_FROM_LABEL.put("iso8859-7", "iso-8859-7");
		ENCODING_FROM_LABEL.put("iso88597", "iso-8859-7");
		ENCODING_FROM_LABEL.put("iso_8859-7", "iso-8859-7");
		ENCODING_FROM_LABEL.put("iso_8859-7:1987", "iso-8859-7");
		ENCODING_FROM_LABEL.put("sun_eu_greek", "iso-8859-7");

		// iso-8859-8
		ENCODING_FROM_LABEL.put("csisolatingreek", "iso-8859-8");
		ENCODING_FROM_LABEL.put("csiso88598e", "iso-8859-8");
		ENCODING_FROM_LABEL.put("csisolatinhebrew", "iso-8859-8");
		ENCODING_FROM_LABEL.put("hebrew", "iso-8859-8");
		ENCODING_FROM_LABEL.put("iso-8859-8", "iso-8859-8");
		ENCODING_FROM_LABEL.put("iso-8859-8-e", "iso-8859-8");
		ENCODING_FROM_LABEL.put("iso-ir-138", "iso-8859-8");
		ENCODING_FROM_LABEL.put("iso8859-8", "iso-8859-8");
		ENCODING_FROM_LABEL.put("iso88598", "iso-8859-8");
		ENCODING_FROM_LABEL.put("iso_8859-8", "iso-8859-8");
		ENCODING_FROM_LABEL.put("iso_8859-8:1988", "iso-8859-8");
		ENCODING_FROM_LABEL.put("visual", "iso-8859-8");

		// iso-8859-8-i
		ENCODING_FROM_LABEL.put("csiso88598i", "iso-8859-8-i");
		ENCODING_FROM_LABEL.put("iso-8859-8-i", "iso-8859-8-i");
		ENCODING_FROM_LABEL.put("logical", "iso-8859-8-i");

		// iso-8859-10
		ENCODING_FROM_LABEL.put("csisolatin6", "iso-8859-10");
		ENCODING_FROM_LABEL.put("iso-8859-10", "iso-8859-10");
		ENCODING_FROM_LABEL.put("iso-ir-157", "iso-8859-10");
		ENCODING_FROM_LABEL.put("iso8859-10", "iso-8859-10");
		ENCODING_FROM_LABEL.put("iso885910", "iso-8859-10");
		ENCODING_FROM_LABEL.put("l6", "iso-8859-10");
		ENCODING_FROM_LABEL.put("latin6", "iso-8859-10");

		// iso-8859-13
		ENCODING_FROM_LABEL.put("iso-8859-13", "iso-8859-13");
		ENCODING_FROM_LABEL.put("iso8859-13", "iso-8859-13");
		ENCODING_FROM_LABEL.put("iso885913", "iso-8859-13");

		// iso-8859-14
		ENCODING_FROM_LABEL.put("iso-8859-14", "iso-8859-14");
		ENCODING_FROM_LABEL.put("iso8859-14", "iso-8859-14");
		ENCODING_FROM_LABEL.put("iso885914", "iso-8859-14");

		// iso-8859-15
		ENCODING_FROM_LABEL.put("csisolatin9", "iso-8859-15");
		ENCODING_FROM_LABEL.put("iso-8859-15", "iso-8859-15");
		ENCODING_FROM_LABEL.put("iso8859-15", "iso-8859-15");
		ENCODING_FROM_LABEL.put("iso885915", "iso-8859-15");
		ENCODING_FROM_LABEL.put("iso_8859-15", "iso-8859-15");
		ENCODING_FROM_LABEL.put("l9", "iso-8859-15");

		// iso-8859-16
		ENCODING_FROM_LABEL.put("iso-8859-16", "iso-8859-16");

		// koi8-r
		ENCODING_FROM_LABEL.put("cskoi8r", "koi8-r");
		ENCODING_FROM_LABEL.put("koi", "koi8-r");
		ENCODING_FROM_LABEL.put("koi8", "koi8-r");
		ENCODING_FROM_LABEL.put("koi8-r", "koi8-r");
		ENCODING_FROM_LABEL.put("koi8_r", "koi8-r");

		// koi8-u
		ENCODING_FROM_LABEL.put("koi8-u", "koi8-u");

		// macintosh
		ENCODING_FROM_LABEL.put("csmacintosh", "macintosh");
		ENCODING_FROM_LABEL.put("mac", "macintosh");
		ENCODING_FROM_LABEL.put("macintosh", "macintosh");
		ENCODING_FROM_LABEL.put("x-mac-roman", "macintosh");

		// windows-874
		ENCODING_FROM_LABEL.put("dos-874", "windows-874");
		ENCODING_FROM_LABEL.put("iso-8859-11", "windows-874");
		ENCODING_FROM_LABEL.put("iso8859-11", "windows-874");
		ENCODING_FROM_LABEL.put("iso885911", "windows-874");
		ENCODING_FROM_LABEL.put("tis-620", "windows-874");
		ENCODING_FROM_LABEL.put("windows-874", "windows-874");

		// windows-1250
		ENCODING_FROM_LABEL.put("cp1250", "windows-1250");
		ENCODING_FROM_LABEL.put("windows-1250", "windows-1250");
		ENCODING_FROM_LABEL.put("x-cp1250", "windows-1250");

		// windows-1251
		ENCODING_FROM_LABEL.put("cp1251", "windows-1251");
		ENCODING_FROM_LABEL.put("windows-1251", "windows-1251");
		ENCODING_FROM_LABEL.put("x-cp1251", "windows-1251");

		// windows-1252
		ENCODING_FROM_LABEL.put("ansi_x3.4-1968", "windows-1252");
		ENCODING_FROM_LABEL.put("ascii", "windows-1252");
		ENCODING_FROM_LABEL.put("cp1252", "windows-1252");
		ENCODING_FROM_LABEL.put("cp819", "windows-1252");
		ENCODING_FROM_LABEL.put("csisolatin1", "windows-1252");
		ENCODING_FROM_LABEL.put("ibm819", "windows-1252");
		ENCODING_FROM_LABEL.put("iso-8859-1", "windows-1252");
		ENCODING_FROM_LABEL.put("iso-ir-100", "windows-1252");
		ENCODING_FROM_LABEL.put("iso8859-1", "windows-1252");
		ENCODING_FROM_LABEL.put("iso88591", "windows-1252");
		ENCODING_FROM_LABEL.put("iso_8859-1", "windows-1252");
		ENCODING_FROM_LABEL.put("iso_8859-1:1987", "windows-1252");
		ENCODING_FROM_LABEL.put("l1", "windows-1252");
		ENCODING_FROM_LABEL.put("latin1", "windows-1252");
		ENCODING_FROM_LABEL.put("us-ascii", "windows-1252");
		ENCODING_FROM_LABEL.put("windows-1252", "windows-1252");
		ENCODING_FROM_LABEL.put("x-cp1252", "windows-1252");

		// windows-1253
		ENCODING_FROM_LABEL.put("cp1253", "windows-1253");
		ENCODING_FROM_LABEL.put("windows-1253", "windows-1253");
		ENCODING_FROM_LABEL.put("x-cp1253", "windows-1253");

		// windows-1254
		ENCODING_FROM_LABEL.put("cp1254", "windows-1254");
		ENCODING_FROM_LABEL.put("csisolatin5", "windows-1254");
		ENCODING_FROM_LABEL.put("iso-8859-9", "windows-1254");
		ENCODING_FROM_LABEL.put("iso-ir-148", "windows-1254");
		ENCODING_FROM_LABEL.put("iso8859-9", "windows-1254");
		ENCODING_FROM_LABEL.put("iso88599", "windows-1254");
		ENCODING_FROM_LABEL.put("iso_8859-9", "windows-1254");
		ENCODING_FROM_LABEL.put("iso_8859-9:1989", "windows-1254");
		ENCODING_FROM_LABEL.put("l5", "windows-1254");
		ENCODING_FROM_LABEL.put("latin5", "windows-1254");
		ENCODING_FROM_LABEL.put("windows-1254", "windows-1254");
		ENCODING_FROM_LABEL.put("x-cp1254", "windows-1254");

		// windows-1255
		ENCODING_FROM_LABEL.put("cp1255", "windows-1255");
		ENCODING_FROM_LABEL.put("windows-1255", "windows-1255");
		ENCODING_FROM_LABEL.put("x-cp1255", "windows-1255");

		// windows-1256
		ENCODING_FROM_LABEL.put("cp1256", "windows-1256");
		ENCODING_FROM_LABEL.put("windows-1256", "windows-1256");
		ENCODING_FROM_LABEL.put("x-cp1256", "windows-1256");

		// windows-1257
		ENCODING_FROM_LABEL.put("cp1257", "windows-1257");
		ENCODING_FROM_LABEL.put("windows-1257", "windows-1257");
		ENCODING_FROM_LABEL.put("x-cp1257", "windows-1257");

		// windows-1258
		ENCODING_FROM_LABEL.put("cp1258", "windows-1258");
		ENCODING_FROM_LABEL.put("windows-1258", "windows-1258");
		ENCODING_FROM_LABEL.put("x-cp1258", "windows-1258");

		// windows-1258
		ENCODING_FROM_LABEL.put("x-mac-cyrillic", "x-mac-cyrillic");
		ENCODING_FROM_LABEL.put("x-mac-ukrainian", "x-mac-cyrillic");

		// Legacy multi-byte Chinese (simplified) encodings
		// ------------------------------------------------

		// gb18030
		ENCODING_FROM_LABEL.put("chinese", "gb18030");
		ENCODING_FROM_LABEL.put("csgb2312", "gb18030");
		ENCODING_FROM_LABEL.put("csiso58gb231280", "gb18030");
		ENCODING_FROM_LABEL.put("gb18030", "gb18030");
		ENCODING_FROM_LABEL.put("gb2312", "gb18030");
		ENCODING_FROM_LABEL.put("gb_2312", "gb18030");
		ENCODING_FROM_LABEL.put("gb_2312-80", "gb18030");
		ENCODING_FROM_LABEL.put("gbk", "gb18030");
		ENCODING_FROM_LABEL.put("iso-ir-58", "gb18030");
		ENCODING_FROM_LABEL.put("x-gbk", "gb18030");

		// gb18030
		ENCODING_FROM_LABEL.put("hz-gb-2312", "hz-gb-2312");

		// Legacy multi-byte Chinese (traditional) encodings
		// ------------------------------------------------

		// big5
		ENCODING_FROM_LABEL.put("big5", "big5");
		ENCODING_FROM_LABEL.put("big5-hkscs", "big5");
		ENCODING_FROM_LABEL.put("cn-big5", "big5");
		ENCODING_FROM_LABEL.put("csbig5", "big5");
		ENCODING_FROM_LABEL.put("x-x-big5", "big5");

		// Legacy multi-byte Japanese encodings
		// ------------------------------------

		// euc-jp
		ENCODING_FROM_LABEL.put("cseucpkdfmtjapanese", "euc-jp");
		ENCODING_FROM_LABEL.put("euc-jp", "euc-jp");
		ENCODING_FROM_LABEL.put("x-euc-jp", "euc-jp");

		// iso-2022-jp
		ENCODING_FROM_LABEL.put("csiso2022jp", "iso-2022-jp");
		ENCODING_FROM_LABEL.put("iso-2022-jp", "iso-2022-jp");

		// iso-2022-jp
		ENCODING_FROM_LABEL.put("csshiftjis", "shift_jis");
		ENCODING_FROM_LABEL.put("ms_kanji", "shift_jis");
		ENCODING_FROM_LABEL.put("shift-jis", "shift_jis");
		ENCODING_FROM_LABEL.put("shift_jis", "shift_jis");
		ENCODING_FROM_LABEL.put("sjis", "shift_jis");
		ENCODING_FROM_LABEL.put("windows-31j", "shift_jis");
		ENCODING_FROM_LABEL.put("x-sjis", "shift_jis");

		// Legacy multi-byte Korean encodings
		// ------------------------------------

		// euc-kr
		ENCODING_FROM_LABEL.put("cseuckr", "euc-kr");
		ENCODING_FROM_LABEL.put("csksc56011987", "euc-kr");
		ENCODING_FROM_LABEL.put("euc-kr", "euc-kr");
		ENCODING_FROM_LABEL.put("iso-ir-149", "euc-kr");
		ENCODING_FROM_LABEL.put("korean", "euc-kr");
		ENCODING_FROM_LABEL.put("ks_c_5601-1987", "euc-kr");
		ENCODING_FROM_LABEL.put("ks_c_5601-1989", "euc-kr");
		ENCODING_FROM_LABEL.put("ksc5601", "euc-kr");
		ENCODING_FROM_LABEL.put("ksc_5601", "euc-kr");
		ENCODING_FROM_LABEL.put("windows-949", "euc-kr");

		// Legacy miscellaneous encodings
		// ------------------------------------

		// replacement
		ENCODING_FROM_LABEL.put("csiso2022kr", "replacement");
		ENCODING_FROM_LABEL.put("iso-2022-cn", "replacement");
		ENCODING_FROM_LABEL.put("iso-2022-cn-ext", "replacement");
		ENCODING_FROM_LABEL.put("iso-2022-kr", "replacement");

		// utf-16be
		ENCODING_FROM_LABEL.put("utf-16be", "utf-16be");

		// utf-16le
		ENCODING_FROM_LABEL.put("utf-16", "utf-16le");
		ENCODING_FROM_LABEL.put("utf-16le", "utf-16le");

		// utf-16le
		ENCODING_FROM_LABEL.put("x-user-defined", "x-user-defined");
	}

	private static final byte[] XML_DECLARATION_PREFIX = "<?xml "
			.getBytes(Charsets.US_ASCII);

	/**
	 * The number of HTML bytes to sniff for encoding info embedded in
	 * <tt>meta</tt> tags; relatively large because we don't have a fallback.
	 */
	private static final int SIZE_OF_HTML_CONTENT_SNIFFED = 4096;

	/**
	 * The number of XML bytes to sniff for encoding info embedded in the XML
	 * declaration; relatively small because it's always at the very beginning
	 * of the file.
	 */
	private static final int SIZE_OF_XML_CONTENT_SNIFFED = 512;

	/**
	 * Disallow instantiation of this class.
	 */
	private EncodingSniffer() {
		// Empty.
	}

	/**
	 * <p>
	 * If the specified content is HTML content, this method sniffs encoding
	 * settings from the specified HTML content and/or the corresponding HTTP
	 * headers based on the <a href=
	 * "http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#determining-the-character-encoding"
	 * >HTML5 encoding sniffing algorithm</a>.
	 * </p>
	 *
	 * <p>
	 * If the specified content is XML content, this method sniffs encoding
	 * settings from the specified XML content and/or the corresponding HTTP
	 * headers using a custom algorithm.
	 * </p>
	 *
	 * <p>
	 * Otherwise, this method sniffs encoding settings from the specified
	 * content of unknown type by looking for <tt>Content-Type</tt> information
	 * in the HTTP headers and <a
	 * href="http://en.wikipedia.org/wiki/Byte_Order_Mark">Byte Order Mark</a>
	 * information in the content.
	 * </p>
	 *
	 * <p>
	 * Note that if an encoding is found but it is not supported on the current
	 * platform, this method returns <tt>null</tt>, as if no encoding had been
	 * found.
	 * </p>
	 *
	 * @param headers
	 *            the HTTP response headers sent back with the content to be
	 *            sniffed
	 * @param content
	 *            the content to be sniffed
	 * @return the encoding sniffed from the specified content and/or the
	 *         corresponding HTTP headers, or <tt>null</tt> if the encoding
	 *         could not be determined
	 * @throws IOException
	 *             if an IO error occurs
	 */
	public static String sniffEncoding(final List<Header> headers,
			final InputStream content) throws IOException {
		if (isHtml(headers)) {
			return sniffHtmlEncoding(headers, content);
		} else if (isXml(headers)) {
			return sniffXmlEncoding(headers, content);
		} else {
			return sniffUnknownContentTypeEncoding(headers, content);
		}
	}

	/**
	 * Returns <tt>true</tt> if the specified HTTP response headers indicate an
	 * HTML response.
	 *
	 * @param headers
	 *            the HTTP response headers
	 * @return <tt>true</tt> if the specified HTTP response headers indicate an
	 *         HTML response
	 */
	static boolean isHtml(final List<Header> headers) {
		return contentTypeEndsWith(headers, "text/html");
	}

	/**
	 * Returns <tt>true</tt> if the specified HTTP response headers indicate an
	 * XML response.
	 *
	 * @param headers
	 *            the HTTP response headers
	 * @return <tt>true</tt> if the specified HTTP response headers indicate an
	 *         XML response
	 */
	static boolean isXml(final List<Header> headers) {
		return contentTypeEndsWith(headers, "text/xml", "application/xml",
				"text/vnd.wap.wml", "+xml");
	}

	/**
	 * Returns <tt>true</tt> if the specified HTTP response headers contain a
	 * <tt>Content-Type</tt> that ends with one of the specified strings.
	 *
	 * @param headers
	 *            the HTTP response headers
	 * @param contentTypeEndings
	 *            the content type endings to search for
	 * @return <tt>true</tt> if the specified HTTP response headers contain a
	 *         <tt>Content-Type</tt> that ends with one of the specified strings
	 */
	static boolean contentTypeEndsWith(final List<Header> headers,
			final String... contentTypeEndings) {
		for (final Header pair : headers) {
			final String name = pair.getName();
			if ("content-type".equalsIgnoreCase(name)) {
				String value = pair.getValue();
				final int i = value.indexOf(';');
				if (i != -1) {
					value = value.substring(0, i);
				}
				value = value.trim().toLowerCase(Locale.ENGLISH);
				boolean found = false;
				for (String ending : contentTypeEndings) {
					if (value.endsWith(ending.toLowerCase(Locale.ENGLISH))) {
						found = true;
						break;
					}
				}
				return found;
			}
		}
		return false;
	}

	/**
	 * <p>
	 * Sniffs encoding settings from the specified HTML content and/or the
	 * corresponding HTTP headers based on the <a href=
	 * "http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#determining-the-character-encoding"
	 * >HTML5 encoding sniffing algorithm</a>.
	 * </p>
	 *
	 * <p>
	 * Note that if an encoding is found but it is not supported on the current
	 * platform, this method returns <tt>null</tt>, as if no encoding had been
	 * found.
	 * </p>
	 *
	 * @param headers
	 *            the HTTP response headers sent back with the HTML content to
	 *            be sniffed
	 * @param content
	 *            the HTML content to be sniffed
	 * @return the encoding sniffed from the specified HTML content and/or the
	 *         corresponding HTTP headers, or <tt>null</tt> if the encoding
	 *         could not be determined
	 * @throws IOException
	 *             if an IO error occurs
	 */
	public static String sniffHtmlEncoding(final List<Header> headers,
			final InputStream content) throws IOException {

		String encoding = sniffEncodingFromHttpHeaders(headers);
		if (encoding != null || content == null) {
			return encoding;
		}

		byte[] bytes = read(content, 3);
		encoding = sniffEncodingFromUnicodeBom(bytes);
		if (encoding != null) {
			return encoding;
		}

		bytes = readAndPrepend(content, SIZE_OF_HTML_CONTENT_SNIFFED, bytes);
		encoding = sniffEncodingFromMetaTag(bytes);
		return encoding;
	}

	/**
	 * <p>
	 * Sniffs encoding settings from the specified XML content and/or the
	 * corresponding HTTP headers using a custom algorithm.
	 * </p>
	 *
	 * <p>
	 * Note that if an encoding is found but it is not supported on the current
	 * platform, this method returns <tt>null</tt>, as if no encoding had been
	 * found.
	 * </p>
	 *
	 * @param headers
	 *            the HTTP response headers sent back with the XML content to be
	 *            sniffed
	 * @param content
	 *            the XML content to be sniffed
	 * @return the encoding sniffed from the specified XML content and/or the
	 *         corresponding HTTP headers, or <tt>null</tt> if the encoding
	 *         could not be determined
	 * @throws IOException
	 *             if an IO error occurs
	 */
	public static String sniffXmlEncoding(final List<Header> headers,
			final InputStream content) throws IOException {

		String encoding = sniffEncodingFromHttpHeaders(headers);
		if (encoding != null || content == null) {
			return encoding;
		}

		byte[] bytes = read(content, 3);
		encoding = sniffEncodingFromUnicodeBom(bytes);
		if (encoding != null) {
			return encoding;
		}

		bytes = readAndPrepend(content, SIZE_OF_XML_CONTENT_SNIFFED, bytes);
		encoding = sniffEncodingFromXmlDeclaration(bytes);
		return encoding;
	}

	/**
	 * <p>
	 * Sniffs encoding settings from the specified content of unknown type by
	 * looking for <tt>Content-Type</tt> information in the HTTP headers and <a
	 * href="http://en.wikipedia.org/wiki/Byte_Order_Mark">Byte Order Mark</a>
	 * information in the content.
	 * </p>
	 *
	 * <p>
	 * Note that if an encoding is found but it is not supported on the current
	 * platform, this method returns <tt>null</tt>, as if no encoding had been
	 * found.
	 * </p>
	 *
	 * @param headers
	 *            the HTTP response headers sent back with the content to be
	 *            sniffed
	 * @param content
	 *            the content to be sniffed
	 * @return the encoding sniffed from the specified content and/or the
	 *         corresponding HTTP headers, or <tt>null</tt> if the encoding
	 *         could not be determined
	 * @throws IOException
	 *             if an IO error occurs
	 */
	public static String sniffUnknownContentTypeEncoding(
			final List<Header> headers, final InputStream content)
			throws IOException {

		String encoding = sniffEncodingFromHttpHeaders(headers);
		if (encoding != null || content == null) {
			return encoding;
		}

		final byte[] bytes = read(content, 3);
		encoding = sniffEncodingFromUnicodeBom(bytes);
		return encoding;
	}

	/**
	 * Attempts to sniff an encoding from the specified HTTP headers.
	 *
	 * @param headers
	 *            the HTTP headers to examine
	 * @return the encoding sniffed from the specified HTTP headers, or
	 *         <tt>null</tt> if the encoding could not be determined
	 */
	static String sniffEncodingFromHttpHeaders(final List<Header> headers) {
		String encoding = null;
		for (final Header pair : headers) {
			final String name = pair.getName();
			if ("content-type".equalsIgnoreCase(name)) {
				final String value = pair.getValue();
				encoding = extractEncodingFromContentType(value);
				if (encoding != null) {
					encoding = encoding.toUpperCase(Locale.ENGLISH);
					break;
				}
			}
		}
		if (encoding != null && LOG.isDebugEnabled()) {
			LOG.debug("Encoding found in HTTP headers: '" + encoding + "'.");
		}
		return encoding;
	}

	/**
	 * Attempts to sniff an encoding from a <a
	 * href="http://en.wikipedia.org/wiki/Byte_Order_Mark">Byte Order Mark</a>
	 * in the specified byte array.
	 *
	 * @param bytes
	 *            the bytes to check for a Byte Order Mark
	 * @return the encoding sniffed from the specified bytes, or <tt>null</tt>
	 *         if the encoding could not be determined
	 */
	static String sniffEncodingFromUnicodeBom(final byte[] bytes) {
		if (bytes == null) {
			return null;
		}

		String encoding = null;
		// 0xef, 0xbb, 0xbf
		if (bytes.length > 2 && ((byte) 0xef) == bytes[0]
				&& ((byte) 0xbb) == bytes[1] && ((byte) 0xbf) == bytes[2]) {
			encoding = UTF8;
		}
		// 0xfe, 0xff
		else if (bytes.length > 2 && ((byte) 0xfe) == bytes[0]
				&& ((byte) 0xff) == bytes[1]) {
			encoding = UTF16_BE;
		}
		// 0xff, 0xfe
		else if (bytes.length > 2 && ((byte) 0xff) == bytes[0]
				&& ((byte) 0xfe) == bytes[1]) {
			encoding = UTF16_LE;
		}

		if (encoding != null && LOG.isDebugEnabled()) {
			LOG.debug("Encoding found in Unicode Byte Order Mark: '" + encoding
					+ "'.");
		}
		return encoding;
	}

	/**
	 * Attempts to sniff an encoding from an HTML <tt>meta</tt> tag in the
	 * specified byte array.
	 *
	 * @param bytes
	 *            the bytes to check for an HTML <tt>meta</tt> tag
	 * @return the encoding sniffed from the specified bytes, or <tt>null</tt>
	 *         if the encoding could not be determined
	 */
	static String sniffEncodingFromMetaTag(final byte[] bytes) {
		for (int i = 0; i < bytes.length; i++) {
			if (matches(bytes, i, COMMENT_START)) {
				i = indexOfSubArray(bytes, new byte[] { '-', '-', '>' }, i);
				if (i == -1) {
					break;
				}
				i += 2;
			} else if (matches(bytes, i, META_START)) {
				i += META_START.length;
				for (Attribute att = getAttribute(bytes, i); att != null; att = getAttribute(
						bytes, i)) {
					i = att.getUpdatedIndex();
					final String name = att.getName();
					final String value = att.getValue();
					if ("charset".equals(name) || "content".equals(name)) {
						String charset = null;
						if ("charset".equals(name)) {
							charset = value;
						} else if ("content".equals(name)) {
							charset = extractEncodingFromContentType(value);
							if (charset == null) {
								continue;
							}
						}
						if (UTF16_BE.equalsIgnoreCase(charset)
								|| UTF16_LE.equalsIgnoreCase(charset)) {
							charset = UTF8;
						}
						if (isSupportedCharset(charset)) {
							charset = charset.toUpperCase(Locale.ENGLISH);
							if (LOG.isDebugEnabled()) {
								LOG.debug("Encoding found in meta tag: '"
										+ charset + "'.");
							}
							return charset;
						}
					}
				}
			} else if (i + 1 < bytes.length && bytes[i] == '<'
					&& Character.isLetter(bytes[i + 1])) {
				i = skipToAnyOf(bytes, i, new byte[] { 0x09, 0x0A, 0x0C, 0x0D,
						0x20, 0x3E });
				if (i == -1) {
					break;
				}
				Attribute att;
				while ((att = getAttribute(bytes, i)) != null) {
					i = att.getUpdatedIndex();
				}
			} else if (i + 2 < bytes.length && bytes[i] == '<'
					&& bytes[i + 1] == '/' && Character.isLetter(bytes[i + 2])) {
				i = skipToAnyOf(bytes, i, new byte[] { 0x09, 0x0A, 0x0C, 0x0D,
						0x20, 0x3E });
				if (i == -1) {
					break;
				}
				Attribute attribute;
				while ((attribute = getAttribute(bytes, i)) != null) {
					i = attribute.getUpdatedIndex();
				}
			} else if (matches(bytes, i, OTHER_START)) {
				i = skipToAnyOf(bytes, i, new byte[] { 0x3E });
				if (i == -1) {
					break;
				}
			}
		}
		return null;
	}

	/**
	 * Extracts an attribute from the specified byte array, starting at the
	 * specified index, using the <a href=
	 * "http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#concept-get-attributes-when-sniffing"
	 * >HTML5 attribute algorithm</a>.
	 *
	 * @param bytes
	 *            the byte array to extract an attribute from
	 * @param i
	 *            the index to start searching from
	 * @return the next attribute in the specified byte array, or <tt>null</tt>
	 *         if one is not available
	 */
	static Attribute getAttribute(final byte[] bytes, int i) {
		if (i >= bytes.length) {
			return null;
		}
		while (bytes[i] == 0x09 || bytes[i] == 0x0A || bytes[i] == 0x0C
				|| bytes[i] == 0x0D || bytes[i] == 0x20 || bytes[i] == 0x2F) {
			i++;
			if (i >= bytes.length) {
				return null;
			}
		}
		if (bytes[i] == '>') {
			return null;
		}
		final StringBuilder name = new StringBuilder();
		final StringBuilder value = new StringBuilder();
		for (;; i++) {
			if (i >= bytes.length) {
				return new Attribute(name.toString(), value.toString(), i);
			}
			if (bytes[i] == '=' && name.length() != 0) {
				i++;
				break;
			}
			if (bytes[i] == 0x09 || bytes[i] == 0x0A || bytes[i] == 0x0C
					|| bytes[i] == 0x0D || bytes[i] == 0x20) {
				while (bytes[i] == 0x09 || bytes[i] == 0x0A || bytes[i] == 0x0C
						|| bytes[i] == 0x0D || bytes[i] == 0x20) {
					i++;
					if (i >= bytes.length) {
						return new Attribute(name.toString(), value.toString(),
								i);
					}
				}
				if (bytes[i] != '=') {
					return new Attribute(name.toString(), value.toString(), i);
				}
				i++;
				break;
			}
			if (bytes[i] == '/' || bytes[i] == '>') {
				return new Attribute(name.toString(), value.toString(), i);
			}
			name.append((char) bytes[i]);
		}
		if (i >= bytes.length) {
			return new Attribute(name.toString(), value.toString(), i);
		}
		while (bytes[i] == 0x09 || bytes[i] == 0x0A || bytes[i] == 0x0C
				|| bytes[i] == 0x0D || bytes[i] == 0x20) {
			i++;
			if (i >= bytes.length) {
				return new Attribute(name.toString(), value.toString(), i);
			}
		}
		if (bytes[i] == '"' || bytes[i] == '\'') {
			final byte b = bytes[i];
			for (i++; i < bytes.length; i++) {
				if (bytes[i] == b) {
					i++;
					return new Attribute(name.toString(), value.toString(), i);
				} else if (bytes[i] >= 'A' && bytes[i] <= 'Z') {
					final byte b2 = (byte) (bytes[i] + 0x20);
					value.append((char) b2);
				} else {
					value.append((char) bytes[i]);
				}
			}
			return new Attribute(name.toString(), value.toString(), i);
		} else if (bytes[i] == '>') {
			return new Attribute(name.toString(), value.toString(), i);
		} else if (bytes[i] >= 'A' && bytes[i] <= 'Z') {
			final byte b = (byte) (bytes[i] + 0x20);
			value.append((char) b);
			i++;
		} else {
			value.append((char) bytes[i]);
			i++;
		}
		for (; i < bytes.length; i++) {
			if (bytes[i] == 0x09 || bytes[i] == 0x0A || bytes[i] == 0x0C
					|| bytes[i] == 0x0D || bytes[i] == 0x20 || bytes[i] == 0x3E) {
				return new Attribute(name.toString(), value.toString(), i);
			} else if (bytes[i] >= 'A' && bytes[i] <= 'Z') {
				final byte b = (byte) (bytes[i] + 0x20);
				value.append((char) b);
			} else {
				value.append((char) bytes[i]);
			}
		}
		return new Attribute(name.toString(), value.toString(), i);
	}

	/**
	 * Extracts an encoding from the specified <tt>Content-Type</tt> value using
	 * <a href="http://ietfreport.isoc.org/idref/draft-abarth-mime-sniff/">the
	 * IETF algorithm</a>; if no encoding is found, this method returns
	 * <tt>null</tt>.
	 *
	 * @param s
	 *            the <tt>Content-Type</tt> value to search for an encoding
	 * @return the encoding found in the specified <tt>Content-Type</tt> value,
	 *         or <tt>null</tt> if no encoding was found
	 */
	static String extractEncodingFromContentType(final String s) {
		if (s == null) {
			return null;
		}
		final byte[] bytes = s.getBytes(Charsets.US_ASCII);
		int i;
		for (i = 0; i < bytes.length; i++) {
			if (matches(bytes, i, CHARSET_START)) {
				i += CHARSET_START.length;
				break;
			}
		}
		if (i == bytes.length) {
			return null;
		}
		while (bytes[i] == 0x09 || bytes[i] == 0x0A || bytes[i] == 0x0C
				|| bytes[i] == 0x0D || bytes[i] == 0x20) {
			i++;
			if (i == bytes.length) {
				return null;
			}
		}
		if (bytes[i] != '=') {
			return null;
		}
		i++;
		if (i == bytes.length) {
			return null;
		}
		while (bytes[i] == 0x09 || bytes[i] == 0x0A || bytes[i] == 0x0C
				|| bytes[i] == 0x0D || bytes[i] == 0x20) {
			i++;
			if (i == bytes.length) {
				return null;
			}
		}
		if (bytes[i] == '"') {
			if (bytes.length <= i + 1) {
				return null;
			}
			final int index = ArrayUtils.indexOf(bytes, (byte) '"', i + 1);
			if (index == -1) {
				return null;
			}
			final String charset = new String(ArrayUtils.subarray(bytes, i + 1,
					index), Charsets.US_ASCII);
			return isSupportedCharset(charset) ? charset : null;
		}
		if (bytes[i] == '\'') {
			if (bytes.length <= i + 1) {
				return null;
			}
			final int index = ArrayUtils.indexOf(bytes, (byte) '\'', i + 1);
			if (index == -1) {
				return null;
			}
			final String charset = new String(ArrayUtils.subarray(bytes, i + 1,
					index), Charsets.US_ASCII);
			return isSupportedCharset(charset) ? charset : null;
		}
		int end = skipToAnyOf(bytes, i, new byte[] { 0x09, 0x0A, 0x0C, 0x0D,
				0x20, 0x3B });
		if (end == -1) {
			end = bytes.length;
		}
		final String charset = new String(ArrayUtils.subarray(bytes, i, end),
				Charsets.US_ASCII);
		return isSupportedCharset(charset) ? charset : null;
	}

	/**
	 * Searches the specified XML content for an XML declaration and returns the
	 * encoding if found, otherwise returns <tt>null</tt>.
	 *
	 * @param bytes
	 *            the XML content to sniff
	 * @return the encoding of the specified XML content, or <tt>null</tt> if it
	 *         could not be determined
	 */
	static String sniffEncodingFromXmlDeclaration(final byte[] bytes) {
		String encoding = null;
		if (bytes.length > 5 && XML_DECLARATION_PREFIX[0] == bytes[0]
				&& XML_DECLARATION_PREFIX[1] == bytes[1]
				&& XML_DECLARATION_PREFIX[2] == bytes[2]
				&& XML_DECLARATION_PREFIX[3] == bytes[3]
				&& XML_DECLARATION_PREFIX[4] == bytes[4]
				&& XML_DECLARATION_PREFIX[5] == bytes[5]) {
			final int index = ArrayUtils.indexOf(bytes, (byte) '?', 2);
			if (index + 1 < bytes.length && bytes[index + 1] == '>') {
				final String declaration = new String(bytes, 0, index + 2,
						Charsets.US_ASCII);
				int start = declaration.indexOf("encoding");
				if (start != -1) {
					start += 8;
					char delimiter;
					outer: while (true) {
						switch (declaration.charAt(start)) {
						case '"':
						case '\'':
							delimiter = declaration.charAt(start);
							start = start + 1;
							break outer;

						default:
							start++;
						}
					}
					final int end = declaration.indexOf(delimiter, start);
					encoding = declaration.substring(start, end);
				}
			}
		}
		if (encoding != null && !isSupportedCharset(encoding)) {
			encoding = null;
		}
		if (encoding != null && LOG.isDebugEnabled()) {
			LOG.debug("Encoding found in XML declaration: '" + encoding + "'.");
		}
		return encoding;
	}

	/**
	 * Returns <tt>true</tt> if the specified charset is supported on this
	 * platform.
	 *
	 * @param charset
	 *            the charset to check
	 * @return <tt>true</tt> if the specified charset is supported on this
	 *         platform
	 */
	static boolean isSupportedCharset(final String charset) {
		try {
			return Charset.isSupported(charset);
		} catch (final IllegalCharsetNameException e) {
			return false;
		}
	}

	/**
	 * Returns <tt>true</tt> if the byte in the specified byte array at the
	 * specified index matches one of the specified byte array patterns.
	 *
	 * @param bytes
	 *            the byte array to search in
	 * @param i
	 *            the index at which to search
	 * @param sought
	 *            the byte array patterns to search for
	 * @return <tt>true</tt> if the byte in the specified byte array at the
	 *         specified index matches one of the specified byte array patterns
	 */
	static boolean matches(final byte[] bytes, final int i,
			final byte[][] sought) {
		if (i + sought.length > bytes.length) {
			return false;
		}
		for (int x = 0; x < sought.length; x++) {
			final byte[] possibilities = sought[x];
			boolean match = false;
			for (int y = 0; y < possibilities.length; y++) {
				if (bytes[i + x] == possibilities[y]) {
					match = true;
					break;
				}
			}
			if (!match) {
				return false;
			}
		}
		return true;
	}

	/**
	 * Skips ahead to the first occurrence of any of the specified targets
	 * within the specified array, starting at the specified index. This method
	 * returns <tt>-1</tt> if none of the targets are found.
	 *
	 * @param bytes
	 *            the array to search through
	 * @param i
	 *            the index to start looking at
	 * @param targets
	 *            the targets to search for
	 * @return the index of the first occurrence of any of the specified targets
	 *         within the specified array
	 */
	static int skipToAnyOf(final byte[] bytes, int i, final byte[] targets) {
		for (; i < bytes.length; i++) {
			if (ArrayUtils.contains(targets, bytes[i])) {
				break;
			}
		}
		if (i == bytes.length) {
			i = -1;
		}
		return i;
	}

	/**
	 * Finds the first index of the specified sub-array inside the specified
	 * array, starting at the specified index. This method returns <tt>-1</tt>
	 * if the specified sub-array cannot be found.
	 *
	 * @param array
	 *            the array to traverse for looking for the sub-array
	 * @param subarray
	 *            the sub-array to find
	 * @param startIndex
	 *            the start index to traverse forwards from
	 * @return the index of the sub-array within the array
	 */
	static int indexOfSubArray(final byte[] array, final byte[] subarray,
			final int startIndex) {
		for (int i = startIndex; i < array.length; i++) {
			boolean found = true;
			if (i + subarray.length > array.length) {
				break;
			}
			for (int j = 0; j < subarray.length; j++) {
				final byte a = array[i + j];
				final byte b = subarray[j];
				if (a != b) {
					found = false;
					break;
				}
			}
			if (found) {
				return i;
			}
		}
		return -1;
	}

	/**
	 * Attempts to read <tt>size</tt> bytes from the specified input stream.
	 * Note that this method is not guaranteed to be able to read <tt>size</tt>
	 * bytes; however, the returned byte array will always be the exact length
	 * of the number of bytes read.
	 *
	 * @param content
	 *            the input stream to read from
	 * @param size
	 *            the number of bytes to try to read
	 * @return the bytes read from the specified input stream
	 * @throws IOException
	 *             if an IO error occurs
	 */
	static byte[] read(final InputStream content, final int size)
			throws IOException {
		byte[] bytes = new byte[size];
		final int count = content.read(bytes);
		if (count == -1) {
			bytes = new byte[0];
		} else if (count < size) {
			final byte[] smaller = new byte[count];
			System.arraycopy(bytes, 0, smaller, 0, count);
			bytes = smaller;
		}
		return bytes;
	}

	/**
	 * Attempts to read <tt>size</tt> bytes from the specified input stream and
	 * then prepends the specified prefix to the bytes read, returning the
	 * resultant byte array. Note that this method is not guaranteed to be able
	 * to read <tt>size</tt> bytes; however, the returned byte array will always
	 * be the exact length of the number of bytes read plus the length of the
	 * prefix array.
	 *
	 * @param content
	 *            the input stream to read from
	 * @param size
	 *            the number of bytes to try to read
	 * @param prefix
	 *            the byte array to prepend to the bytes read from the specified
	 *            input stream
	 * @return the bytes read from the specified input stream, prefixed by the
	 *         specified prefix
	 * @throws IOException
	 *             if an IO error occurs
	 */
	static byte[] readAndPrepend(final InputStream content, final int size,
			final byte[] prefix) throws IOException {
		final byte[] bytes = read(content, size);
		final byte[] joined = new byte[prefix.length + bytes.length];
		System.arraycopy(prefix, 0, joined, 0, prefix.length);
		System.arraycopy(bytes, 0, joined, prefix.length, bytes.length);
		return joined;
	}

	static class Attribute {
		private final String name_;
		private final String value_;
		private final int updatedIndex_;

		Attribute(final String name, final String value, final int updatedIndex) {
			name_ = name;
			value_ = value;
			updatedIndex_ = updatedIndex;
		}

		String getName() {
			return name_;
		}

		String getValue() {
			return value_;
		}

		int getUpdatedIndex() {
			return updatedIndex_;
		}
	}

	/**
	 * Translates the given encoding label into a normalized form according to
	 * <a href="http://encoding.spec.whatwg.org/#encodings">Reference</a>.
	 * 
	 * @param encodingLabel
	 *            the label to translate
	 * @return the normalized encoding name or null if not found
	 */
	public static String translateEncodingLabel(final String encodingLabel) {
		if (null == encodingLabel) {
			return null;
		}
		final String encLC = encodingLabel.trim().toLowerCase(Locale.ENGLISH);
		final String enc = ENCODING_FROM_LABEL.get(encLC);
		if (encLC.equals(enc)) {
			return encodingLabel;
		}
		return enc;
	}
}
